## Estimation of a Multinomial Logit Model by Maximum Likelihood using mnlogit and Fixed point subroutine 
## with inds split into two halfs based on random index forming beliefs about occupational choices from only those from the sam half inform the PML objective function
## by Jose-Alberto Guerra
##    Created   09/2019
##    Modified  13/09/2019
##    Uses: R version 3.1.2 (install old version of R, by holding down the Control key during the launch of RStudio you can use 64-bit 3.1.2 version)
#Proof2Rnew.dta comes from border_reg_weighted4v2.do, notice we have eliminated agricultural
#Proof2RCoordnew.dta is Proof2Rnew but adding the coordinates for objectid_1 points. It comes from TemporyPointsCoordinatesv1.do
#to deal with spatial data visualitaion in R see http://spatial.ly/r/
##Check lines below Robustness, they should be commented out for our basic specification
## Where we guarantee that KS probabilities are within [0,1]
## We guarantee inner loop (fixed point expectations) convergence using tol 10e-10 for the sup-norm stopping criterion
## We follow KS 2012 and KS 2018 using outer loop iterations k=50 and guaranteeing inner loop convergence (see above)
## half should be set as:
## 1. half <- h1 for first half
## 2. half <- h2 for second half
## It should be run after Mlogit_NAPPHeterov4.R after activating both robustness related to h1 and h2
###########################################################################
rm(list=ls(all=TRUE))
#########################
####    Preamble     ####
#########################

###########################################################################
#1. DATA
###########################################################################
###########################################################################



###########################
###Heterogenous Beliefs (i.e. ``network'' interactions, correlated effects and fixed point beliefs subroutine)
###########################
rm(list=ls(all=TRUE))
set.seed(12345)
#root <- "S:/NetworkParish/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
setwd(paste(root,"Results/ReStat/",sep=""))
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
AM <- 0 #if using Aguirregabiria and Mira==1, 0 if using Kasahara and Shimotsu
#if( AM==1 ){  rootsave <- paste(root,"Results/EEA/",sep="") } else   {rootsave <- paste(root,"Results/EEA/KS/",sep="")}
if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
#codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Codes/RCodes/EEACodes/" #codes <- "C:/HPC/CODE/"
#source(paste(codes,"Mlogit_NAPPHeteroFunctions.R",sep=""))
half <- "h2" #"h1"
sink(paste(rootsave,"NAPPEstimationHetenewhalf",half,".txt",sep=""))

#######
#Load Data 

# Identify those individuals whose index belongs to studied half
load(file=paste(root,"NAPP/ReStat/pooleddataNAPPnew",half,".RData",sep="" ))
HHhalf <- unique(pooleddata$ID)
IDSocialhalf <- unique(pooleddata$IDSocial)

#Calling Data see Mlogit_NAPPHeterov3.R
#load(file=paste(root,"NAPP/ReStat/PooledNAPPIDnewwcontinuous.RData",sep="" )) #PooledNAPPID
#load(file=paste(root,"NAPP/ReStat/NAPPXlistnewwcontinuous.RData",sep="" )) #NAPPXlist
#load(file=paste(root,"NAPP/ReStat/coordslistNAPPnewwcontinuous.RData",sep="" )) #coordslist

load(file=paste(root,"NAPP/ReStat/pooleddataNAPPneww50list.RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
IDSocialall <- unique(pooleddata$IDSocial)
#Identify position and ID of parishes not included in the half exercise
PosNIIDSocial <- which(!(IDSocialall %in% IDSocialhalf))
PosIIDSocial <- which(IDSocialall %in% IDSocialhalf)
NIIDSocial <- IDSocialall[PosNIIDSocial]
load(file=paste(root,"NAPP/ReStat/pooleddataNAPPhomoneww50list.RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPneww50list.RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomoneww50list.RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/datalistNAPPneww50list.RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
load(file=paste(root,"NAPP/ReStat/coordslistNAPPneww50list.RData",sep="" )) #coordslist, Lists of coordinates per parish
load(file=paste(root,"NAPP/ReStat/dneighboursNAPPneww50list.RData",sep="")) #dneighbours, Lists of neigbours identities per parish
load(file=paste(root,"NAPP/ReStat/wlistNAPPneww50list.RData",sep="")) #wlist, Sparse Lists of neighbours per parish

#Restricting data to those individuals in half
pooleddata <- pooleddata[!(pooleddata$IDSocial %in% NIIDSocial),]
pooleddatahomo <- pooleddatahomo[!(pooleddatahomo$IDSocial %in% NIIDSocial),]
pooleddatalist <- pooleddatalist[!(pooleddatalist$IDSocial %in% NIIDSocial),]
pooleddatalisthomo <- pooleddatalisthomo[!(pooleddatalisthomo$IDSocial %in% NIIDSocial),]
datalist <- datalist[(PosIIDSocial)]
coordslist <- coordslist[(PosIIDSocial)]
dneighbours <- dneighbours[(PosIIDSocial)]
wlist <- wlist[(PosIIDSocial)]

#To save data paste(root,"Mola Data/",sep="")
save(pooleddata, file=paste(root,"NAPP/ReStat/pooleddataNAPPnewhalf",half,".RData",sep="" )) #Data per parish in long format (sw) pooled together
save(pooleddatahomo, file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonewhalf",half,".RData",sep="" )) #Data per parish in long format (sw) pooled together for homogeneous beliefs
save(pooleddatalist, file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnewhalf",half,".RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together
save(pooleddatalisthomo, file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonewhalf",half,".RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together for the homogeneous case
save(datalist, file=paste(root,"NAPP/ReStat/datalistNAPPnewhalf",half,".RData",sep="" )) #Lists of data per parish in wide format (sw.1,sw.2,...)
save(coordslist, file=paste(root,"NAPP/ReStat/coordslistNAPPnewhalf",half,".RData",sep="" )) #Lists of coordinates per parish
save(dneighbours, file=paste(root,"NAPP/ReStat/dneighboursNAPPnewhalf",half,".RData",sep="")) #Lists of neigbours identities per parish
save(wlist, file=paste(root,"NAPP/ReStat/wlistNAPPnewhalf",half,".RData",sep="")) #Sparse Lists of neighbours per parish

options(max.print=1000000)
gc()
print("Heterogeneous  beliefs: ``network'' interactions, correlated effects and fixed point beliefs subroutine")

#define which variables we include in the estimation
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc" "SEX",
wvar <- "sw"

pooleddatahalf <- pooleddata[pooleddata$ID %in% HHhalf,]
## Naive Estimation Heterogeneous
###################
print("Naive Estimation")
gc()
#1. x_i  
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(xvars, collapse= "+"),"|", wvar ,sep="")))          
ml.w <- mnlogit(fformula,pooleddatahalf,"alt")
save(ml.w,file=paste("Mlwnewhalf",half,".RData",sep=""))
print("w, x_i")
print(summary(ml.w))
#2. x_i, x_p
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep="")),"|", wvar ,sep="")))          
ml.om.w <- mnlogit(fformula, pooleddatahalf,"alt")
save(ml.om.w,file=paste("Mlomwnewhalf",half,".RData",sep=""))
print("w, x_i x_p")
print(summary(ml.om.w))
#3. x_i, x_p, t_b
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
ml.m.w <- mnlogit(fformula, pooleddatahalf,"alt")
save(ml.m.w,file=paste("Mlmwnewhalf",half,".RData",sep=""))
print("w, x_i x_p t_b")
print(summary(ml.m.w))

#4. x_i, x_p, t_s
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep=""))) 
ml.mt.w <- mnlogit(fformula, pooleddatahalf,"alt")
save(ml.mt.w,file=paste("Mlmtwnewhalf",half,".RData",sep=""))
#0 When robustness we have to change the ending of the file to accomodate one of the following
# #wexernei COULD BE atleast1 w100list nmovers age1530 w50list atmost10
#1. end of robust
print("w, x_i x_p t_p")
print(summary(ml.mt.w))
sink()

## Nested Pseudo likelihood Estimation Heterogeneous
###################

#source(paste(codes,"Mlogit_NAPPHeterofformula1v2Split-jackknifebeliefs.R",sep=""))